#
# TCSS 422 - Spring 2013
# Paul Ganea, Ralph Feltis, Tarl Hahn
# Team: Something Awesome (We can't remember the exact name)
# 5/2/2013
#

#
# Object that parses html and separates out words, keywords, and urls into arrays
#
from HTMLParser import HTMLParser  
    
class MyHTMLParser(HTMLParser):
  def __init__(self):
    HTMLParser.__init__(self)
    self.list = []
    self.keywords = []
    self.urls = []
    
  def handle_starttag(self, tag, attrs):
        if tag == "a":
           for name, value in attrs:
               if name == "href":
                   self.urls.append(value)
      
  def handle_data(self, data):
      self.list.append(data) #Method writes all data to an array    